####This R script replicates the models and figures presented in 
####"When Reformers Become Spoilers: Discretionary Implementation of Extraordinary Restitution Reform under Extractivism in Colombia"


library(dplyr)
library(ggplot2)
library(stargazer)
library(geodata)
library(terra)  
library(sf)
library(stringr)

#setwd("")

load("data_analysis.RData")

## Keep only if case entered during Santos
base_foranalysis <- base_foranalysis_april24 %>%
  filter(min_fecha_adminis < as.Date("2018-08-07"))

base_foranalysis$days_until_exit <- ifelse(
  is.na(base_foranalysis$tiempo_admin_salieron_registro),
  base_foranalysis$tiempo_admin_if_trapped,
  base_foranalysis$tiempo_admin_salieron_radi
)

base_foranalysis$trapped <- ifelse(
  is.na(base_foranalysis$tiempo_admin_salieron_registro),
  1,
  0
)

base_foranalysis$tiempo_admin_both <- ifelse(
  !is.na(base_foranalysis$tiempo_admin_salieron_radi),
  base_foranalysis$tiempo_admin_salieron_radi,
  base_foranalysis$tiempo_admin_if_trapped
)

######### Analysis using LRU Local Office as key DV for Alternative Hypothesis:
#### Analysis for alternative hypothesis including Table 3 in Appendix,
#### Figure 7 and 8 in the paper, and Table 4 in the Appendix.

table(base_foranalysis$direccion_territorial)

base_foranalysis$direccion_territorial_factor <-
  relevel(factor(base_foranalysis$direccion_territorial), ref = "Bogotá")

m1b <- lm(tiempo_admin_both ~ direccion_territorial_factor, data = base_foranalysis)
m2  <- lm(success ~ direccion_territorial_factor, data = base_foranalysis)

stargazer(m1b, m2, digits = 2, omit.stat = c("f", "ser"))

table_summary <- base_foranalysis %>%
  group_by(direccion_territorial) %>%
  summarise(
    Observations = n(),
    Mean_Days    = mean(tiempo_admin_both, na.rm = TRUE),
    Prob_Success = mean(success, na.rm = TRUE)
  )

summary_data <- base_foranalysis %>%
  group_by(direccion_territorial_factor) %>%
  summarise(
    Avg_Days      = mean(tiempo_admin_both, na.rm = TRUE),
    Std_Error     = sd(tiempo_admin_both, na.rm = TRUE) / sqrt(n()),  # Standard Error
    Observations  = n(),
    Avg_success   = mean(success, na.rm = TRUE),
    Std_Error_suc = sd(success, na.rm = TRUE) / sqrt(n())
  )

summary_data <- summary_data %>%
  mutate(
    Significant = ifelse(
      direccion_territorial_factor %in% c(
        "Antioquia", "Bolívar", "Cauca", "Córdoba", "Magdalena", "Bogotá"
      ),
      0, 1
    ),
    Significant_SUC = ifelse(
      direccion_territorial_factor %in% c(
        "Antioquia", "Bolívar", "Córdoba", "Magdalena", "Bogotá"
      ),
      0, 1
    )
  )

# Average number of days by LRU local office
# (colors those that are statistically different from Bogotá)
ggplot(summary_data,
       aes(x = direccion_territorial_factor,
           y = Avg_Days,
           fill = as.factor(Significant))) +
  geom_bar(stat = "identity", alpha = 0.7) +
  geom_errorbar(aes(ymin = Avg_Days - Std_Error,
                    ymax = Avg_Days + Std_Error),
                width = 0.2) +
  labs(
    x = "Regional Office",
    y = "Average Number of Days"
  ) +
  scale_fill_manual(
    values = c("grey25", "grey88"),
    labels = c("Not Significantly Different", "Significantly Different"),
    name   = "Difference from Bogotá"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Average success by office
ggplot(summary_data,
       aes(x = direccion_territorial_factor,
           y = Avg_success,
           fill = as.factor(Significant_SUC))) +
  geom_bar(stat = "identity", alpha = 0.7) +
  geom_errorbar(aes(ymin = Avg_success - Std_Error_suc,
                    ymax = Avg_success + Std_Error_suc),
                width = 0.2) +
  labs(
    x = "Regional Office",
    y = "Probability of Success"
  ) +
  scale_fill_manual(
    values = c("grey25", "grey88"),
    labels = c("Not Significantly Different", "Significantly Different"),
    name   = "Difference from Bogotá"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

## Restrict sample to after IMEI for regression analysis
base_afectadas <- base_foranalysis %>%
  filter(affected_amei_carac == 1)

mean(base_afectadas$tiempo_admin_both)
sd(base_afectadas$tiempo_admin_both)

#### Subsets specifically for plots
subset_data <- base_foranalysis[!is.na(base_foranalysis$tiempo_admin_salieron_registro), ]
subset_data <- subset(subset_data, min_fecha_adminis < as.Date("2018-08-07"))

# Order subsetted data by min_fecha_adminis
ordered_data <- subset_data[order(subset_data$min_fecha_adminis), ]

#### Figure 6
ggplot(
  ordered_data,
  aes(
    x     = min_fecha_adminis,
    y     = tiempo_admin_salieron_registro,
    shape = factor(affected_amei_carac)
  )
) +
  geom_point(size = 3) +
  geom_vline(
    xintercept = as.Date("2015-05-29"),
    linetype   = "dashed",
    color      = "black"
  ) +
  labs(
    title = "Days in administrative stage before and after OMMEI",
    x     = "Date entered administrative",
    y     = "Number of days",
    shape = "Affected"
  ) +
  scale_shape_manual(values = c(16, 17)) +  # 16 = circle, 17 = triangle
  theme_bw() +
  theme(
    panel.background = element_rect(fill = "white"),
    legend.position  = "bottom",
    text             = element_text(size = 16)
  )

#### Replicates Table 2 in paper: outcome variable: success.

mean(base_afectadas$success)
sd(base_afectadas$success)

m1_2 <- lm(success ~ log(num_titulos + 1), data = base_afectadas)
m2_2 <- lm(success ~ log(num_titulos + 1) + Familias + comunidad_negra +
             hectareas_afectadas_cart, data = base_afectadas)
m3_2 <- lm(success ~ log(num_titulos + 1) + Familias + comunidad_negra +
             hectareas_afectadas_cart + factor(direccion_territorial),
           data = base_afectadas)

m4_2 <- lm(success ~ top_50_dummy, data = base_afectadas)
m5_2 <- lm(success ~ top_50_dummy + Familias + comunidad_negra +
             hectareas_afectadas_cart, data = base_afectadas)
m6_2 <- lm(success ~ top_50_dummy + Familias + comunidad_negra +
             hectareas_afectadas_cart + factor(direccion_territorial),
           data = base_afectadas)

m7_2 <- lm(success ~ top_100_dummy, data = base_afectadas)
m8_2 <- lm(success ~ top_100_dummy + Familias + comunidad_negra +
             hectareas_afectadas_cart, data = base_afectadas)
m9_2 <- lm(success ~ top_100_dummy + Familias + comunidad_negra +
             hectareas_afectadas_cart + factor(direccion_territorial),
           data = base_afectadas)

## PRODUCES TABLE 2 - OVERLEAF
stargazer(
  m1_2, m2_2, m3_2, m4_2, m5_2, m6_2, m7_2, m8_2, m9_2,
  digits    = 2,
  omit.stat = c("f", "ser"),
  omit      = "factor\\(direccion_territorial\\)"
)

#### Replicates Table 3 in paper: outcome variable: time in administrative stage.

mean(base_afectadas$tiempo_admin_both)
sd(base_afectadas$tiempo_admin_both)

m1_3 <- lm(tiempo_admin_both ~ log(num_titulos + 1), data = base_afectadas)
m2_3 <- lm(tiempo_admin_both ~ log(num_titulos + 1) + Familias +
             comunidad_negra + hectareas_afectadas_cart,
           data = base_afectadas)
m3_3 <- lm(tiempo_admin_both ~ log(num_titulos + 1) + Familias +
             comunidad_negra + hectareas_afectadas_cart +
             factor(direccion_territorial),
           data = base_afectadas)

m4_3 <- lm(tiempo_admin_both ~ top_50_dummy, data = base_afectadas)
m5_3 <- lm(tiempo_admin_both ~ top_50_dummy + Familias + comunidad_negra +
             hectareas_afectadas_cart,
           data = base_afectadas)
m6_3 <- lm(tiempo_admin_both ~ top_50_dummy + Familias + comunidad_negra +
             hectareas_afectadas_cart + factor(direccion_territorial),
           data = base_afectadas)

m7_3 <- lm(tiempo_admin_both ~ top_100_dummy, data = base_afectadas)
m8_3 <- lm(tiempo_admin_both ~ top_100_dummy + Familias + comunidad_negra +
             hectareas_afectadas_cart,
           data = base_afectadas)
m9_3 <- lm(tiempo_admin_both ~ top_100_dummy + Familias + comunidad_negra +
             hectareas_afectadas_cart + factor(direccion_territorial),
           data = base_afectadas)

# PRODUCES TABLE 3 - OVERLEAF
stargazer(
  m1_3, m2_3, m3_3, m4_3, m5_3, m6_3, m7_3, m8_3, m9_3,
  digits    = 2,
  omit.stat = c("f", "ser"),
  omit      = "factor\\(direccion_territorial\\)"
)

